import scrapy
from salarys.items import SalarysItem
import datetime
import socket

class SalarysSpider(scrapy.Spider):

    name = "salarys"

    start_urls = ['https://www.zhipin.com/c101120100/?query=Java&page=1']

    for page in list(range(30)):
        page = page + 1
        prefixUrl = 'https://www.zhipin.com/c101120100/?query=Java&page='
        start_urls.append(prefixUrl + str(page))
    print(start_urls)

    def parse(self,response):


        for i in list(range(29)):
            item = SalarysItem()

            print('index')
            print(i)
 
            item['local'] = response.xpath('//div[@class="job-list"]/ul/li/div[@class="job-primary"]/div[@class="info-primary"]/p/text()').extract()[3*i]

            item['exp'] = response.xpath('//div[@class="job-list"]/ul/li/div[@class="job-primary"]/div[@class="info-primary"]/p/text()').extract()[3*i + 1].replace("年","")



            salary = response.xpath('//div[@class="job-list"]/ul/li/div[@class="job-primary"]/div[@class="info-primary"]//span[@class="red"]/text()').extract()[i]

            item['min_salary'] = int(salary.split("-")[0].replace("k",""))
            item['max_salary'] = int(salary.split("-")[1].replace("k",""))


            item['cname'] = response.xpath('//div[@class="job-list"]/ul/li/div[@class="job-primary"]/div[@class="info-company"]//h3[@class="name"]/a/text()').extract()[i]

            item['publish_time'] = response.xpath('//div[@class="job-list"]/ul/li/div[@class="job-primary"]/div[@class="info-publis"]//p/text()').extract()[i].replace("发布于","").replace("月","-").replace("日","")

            item['job'] = 'java'

            item['origin'] = 'boss'

            yield item
